*! version 5.0
* 13 August 2018
* NIDS
* Master Income do file for Nids Wave 3

* THIS IS 6th INCOME DO FILE - POST-IMPUTATION AGGREGATION DO FILE: 6 OF 7
* THIS DO FILE AGGREGATES THE POST-IMPUTATION INCOME VARIABLES

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "Master Income do file (1 of 7)"

version 12.0

*=====================================================================================================================================

* OPENING DATASET CREATED IN DO FILE "Income - Performing Imputations for missing data on Income varaibles (4 of 7)"

use "$DataOUT\impdata.dta", clear

*-------------------------------------------------------------------------------------------------------------------------------------

* UNIT NON-RESPONCE RATE

sort w1_hhid pid
egen hhunitR=sum(response), by(w1_hhid)
egen hhsize_adult=count(pid), by(w1_hhid)
gen hhunitNRR=1-(hhunitR/hhsize_adult)
recode hhunitNRR (0=0 "0%") (0.0001/0.24999=1 "0%-25%") (0.25/0.4999=2 "25% - 49%") (0.5/0.74999=3 "50% - 74%") (0.75/0.999=4 "75% - 100%") (1=5 "100%"), gen(hhunitNRR_6)

*-------------------------------------------------------------------------------------------------------------------------------------

* ALLOCATING INCOME TO GENERAL CATEGORIES

***State Old Age pension

*state old-age pension step1
xi: probit spen_rec age agesq white coloured asian_indian other i.province male i.male*age if age>=60
predict spen_rechat

*state old age pension unit non-reponse imputation
gen spenhat2=spen_rechat*870 if intmonth<=4 & response==0 & age>=60
replace spenhat2=spen_rechat*940 if intmonth<=10 & intmonth>4  & response==0 & age>=60
replace spenhat2=spen_rechat*960 if intmonth>10  & response==0 & age>=60
gen spenimpute2=1 if response==0 & spenhat2!=.
replace spenimpute2=0 if response==1 | spenhat2==.
replace spen=spenhat2 if spen==. & spenhat2!=. & response==0

*Imputation summary variable for old age pension income
replace spen_flg=4 if  spenimpute2==1

***Child support grant

*child support grant step1
xi: probit chld_rec age agesq white coloured asian_indian other i.province male hhchildren biochildren
predict chld_rechat

*state old age pension unit non-reponse imputation
gen chldhat2=chld_rechat*210 if intmonth<=4 & response==0
replace chldhat2=chld_rechat*220 if intmonth<=10 & intmonth>4  & response==0
replace chldhat2=chld_rechat*230 if intmonth>10  & response==0
gen chldimpute2=1 if response==0 & chldhat2!=.
replace chldimpute2=0 if response==1 | chldhat2==.
replace chld=chldhat2 if chld==. & chldhat2!=. & response==0

*Imputation summary variable for child grant income
replace chld_flg=4 if  chldimpute2==1

*-------------------------------------------------------------------------------------------------------------------------------------

* IMPUTATIONS FOR WAGE INCOME FOR NON-RESPONDERS

***Wage income for smooth
egen wageincome=rowtotal(fwag cwag swag cheq bonu)
replace wageincome=. if response==0
gen lnwageincome=ln(wageincome)

*Wage income imputation stage 1 for smooth
gen wageincome_d=wageincome!=0 if response==1
replace wageincome_d=. if wageincome==.
xi: probit wageincome_d schooling i.postschool male roster_married age age_d agesq agecu i.province i.race i.hometype i.homewalls i.homeroof homerooms 
predict wageincome_dhat
*fwag_p income imputation stage 2 for smooth
xi: regress lnwageincome schooling i.postschool male roster_married age age_d agesq agecu i.province i.race i.hometype i.homewalls i.homeroof homerooms if wageincome>0
predict lnwageincomehat
gen wageincomehat=exp(lnwageincomehat)
gen wageincomeEV=wageincomehat*wageincome_dhat
gen wageincomeimpute=1 if response==0 & wageincomeEV!=.
replace wageincomeimpute=0 if response==1 | wageincomeEV==.
replace wageincome=wageincomeEV if wageincome==. & wageincomeEV!=. & response==0
xi: impute lnwageincome schooling i.postschool male roster_married age age_d agesq i.province i.race homerooms, gen(lnwageincomehat2)
gen wageincomeimpute2=1 if response==0 & lnwageincomehat2!=.
replace wageincome=exp(lnwageincomehat2) if wageincome==. & response==0

*Imputation summary variable for smooth wage income
gen wageincome_flg=1 if wageincome!=.
replace wageincome_flg=2 if wageincomeimpute==1
replace wageincome_flg=3 if wageincome==.
replace wageincome_flg=4 if wageincomeimpute2==1
label define wageincome_flg 1 "Survey*" 2 "Imputed" 3 "Missing" 4 "Imputed using `impute'"
label values wageincome_flg wageincome_flg

*********************************************************************************

***Allocating income to general categories

*********************************************************************************

*Total labour market income for smooth
egen labourincome=rowtotal(wageincome help prof extr)
gen lnlabourincome=ln(labourincome)
label variable labourincome "Monthly income from all labour market sources"

*Government income
egen governmentincome=rowtotal(spen dis chld fost care)
replace governmentincome=0 if governmentincome<10 
gen lngovernmentincome=ln(governmentincome)
label variable governmentincome "Monthly income from government grants"

*Other government income
egen otherincome=rowtotal(uif comp)
gen lnotherincome=ln(otherincome)
label variable otherincome "Monthly income from UIF and/or workers compensation"

*Investment income from stocks, loans, rentals, private pensions, retirement annuities...
egen investmentincome=rowtotal(indi rnt ppen)
gen lninvestmentincome=ln(investmentincome)
label variable investmentincome "Monthly income from divideds, interest, priv pension, retirement annuities"

*Income of a capital nature
egen capitalincome=rowtotal(inhe retr brid gift loan sale othe)
*will there be other income all will it all be reassigned
gen lncapitalincome=ln(capitalincome)
label variable capitalincome "Monthly income of a capital nature (see income paper)"

*Remittances
gen remittanceincome=remt
gen lnremittanceincome=ln(remittanceincome)
label variable remittanceincome "Monthly income from all remittances"

*-------------------------------------------------------------------------------------------------------------------------------------

* AGGREGATING

***Household income data

*household level income categories
egen hhwage=sum(labourincome), by(w1_hhid)
replace hhwage=. if hhwage==0
label variable hhwage "Household monthly income from labour market"
egen hhgovt=sum(governmentincome), by(w1_hhid)
replace hhgovt=. if hhgovt==0
label variable hhgovt "Household monthly income from government grants"
egen hhother=sum(otherincome), by(w1_hhid)
replace hhother=. if hhother==0
label variable hhother "Household monthly income from other government sources"
egen hhinvest=sum(investmentincome), by(w1_hhid)
replace hhinvest=. if hhinvest==0
label variable hhinvest "Household monthly income from investments"
egen hhcapital=sum(capitalincome), by(w1_hhid)
replace hhcapital=. if hhcapital==0
label variable hhcapital "Household monthly income of a capital nature"
egen hhremitt=sum(remittanceincome), by(w1_hhid)
replace hhremitt=. if hhremitt==0
label variable hhremitt "Household monthly income from remittances"
gen hhimprent_inc=imprent
label variable hhimprent_inc "Household monthly implied rental income"

gen hhimprent_flg=imprent_flg
label variable hhimprent_flg "The monthly household implied rental income data is from:"
label define hhimprent_flg 1 "Survey" 2 "Imputed" 3 "Missing", modify
label values hhimprent_flg hhimprent_flg

*Count of number of ppl who have missings or refusals to most important inc questions
gen proxyinc=1 if w1_p_emactcur_u==1 | w1_p_emactcur_u==2
local listing="proxyinc remt_rec w1_a_em* w1_a_inc*"
gen misinc=0
foreach x of varlist proxyinc remt_rec w1_a_em1 w1_a_ems w1_a_emc w1_a_incgovpen w1_a_incdis w1_a_incchld w1_a_incfos w1_a_inccare {
replace misinc=misinc+1 if `x'==-8
}
egen temp=sum(misinc), by(w1_hhid)
replace misinc=temp
drop temp

*Total smooth income
egen hhincome=rowtotal(hhwage hhgovt hhother hhinvest hhremitt hhagric)
replace hhincome=. if misinc>0   
replace hhincome=. if hhincome==0 & hhq_incb_flg==1 /*This sets to missing those who have zeros in the indiv, but non-zero in the household one-shot*/
gen hhincome_flg=1 if hhincome!=.
replace hhincome=hhq_incb if hhincome==. & hhq_incb_flg==1
replace hhincome_flg=2 if hhincome!=. & hhincome_flg!=1
egen temp=rowtotal(hhwage hhgovt hhother hhinvest hhremitt hhagric)
replace hhincome=temp if hhincome==. & hhincome_flg==. 
replace hhincome_flg=3 if hhincome!=. & hhincome_flg==.
drop temp
egen temp=rowtotal(hhincome hhimprent_inc)
replace hhincome=temp
drop temp
label variable hhincome_flg "The source of the monthly income data for this individual is:"
label define hhincome_flg 1 "Individual" 2 "Household" 3 "Individual - sig. imp"
label values hhincome_flg hhincome_flg
label variable hhincome "Household monthly income - full imputations"
foreach var of varlist hhwage hhgov hhother hhremitt hhinvest {
replace `var'=. if hhincome_flg==2
}
sort w1_hhid pid
foreach var of varlist hhwage hhgov hhother hhremitt hhinvest {
count if `var'>hhincome & `var'!=. & w1_hhid!=w1_hhid[_n-1]
}

save "$DataOUT\hhdata.dta", replace

* end of do file 
*=====================================================================================================================================
